***************
* Title: gambia_ecd_edcc_dataclean_table4.do
* Author: Todd Pugatch
* Description: replication code for Blimpo, Carneiro, Jervis, and Pugatch,
*	"Improving Access and Quality in Early Childhood Development Programs: 
*		Experimental Evidence from The Gambia"
*	for Economic Development and Cultural Change
* Inputs: Gambia_ECD_Monitoring_3-6 community&annex_with settlement code STATA 12.dta, ECD_3to6_Gambia_cleanv1.dta
* Outputs: ECD_3to6_Gambia_monitoring_cleanv1.dta
* Notes: creates dataset used in Table 4
****************
#delimit;
local start=`"$S_TIME"';
clear;
clear matrix;
clear mata;
graph drop _all;
cap log close;
set more off;
scalar define mode=3; /*1=Todd laptop, 2=Todd desktop, 3=Todd desktop 2, 4=Moussa*/
local monitoring=`"Monitoring"';
local data=`"Data"';
local cleandata=`"Data\cleaned"';

* Load data entered by Yanbin (Tracy) Xu in Gambia_ECD_Monitoring_3-6 community&annex.do;
/*set directory:
	cd mydir
*/

* CLEAN DATA;
* Note that cleaning is just additional cleaning on top of that done in Gambia_ECD_Monitoring_3-6 community&annex.do;
* Section 1: site characteristics;

* Section 2: facilitator characteristics (Q201-Q217);
lab var q202female "female";
qui gen q205edulevel_primary_ormore=(q205edulevel>=6 & q205edulevel!=.);
qui gen q205edulevel_lowsec_ormore=(q205edulevel>=9 & q205edulevel!=.);
qui gen q205edulevel_sec_ormore=(q205edulevel>=12 & q205edulevel!=.);
lab var q205edulevel_primary_ormore "completed at least primary";
lab var q205edulevel_lowsec_ormore "completed at least lower secondary";
lab var q205edulevel_sec_ormore "completed at least secondary";

qui gen q207qualified=(q207training==5|q207training==6|q207training==7|q207training==14);
lab var q207qualified "qualified teacher (PTC/HTC)";

qui destring q210teachinglength, gen(q210teachexperience) force;
qui replace q210teachexperience=0 if q209teachingexp==0|q210teachinglength=="6mths";
qui replace q212ECDlength=0 if q211ECDexperience==0;
lab var q210teachexperience "teaching experience, years";

* Section 2: facilitator & child absence (aQ201-aQ210);
qui destring a201wkhrslastweek, gen(a201workhrs) force;
qui replace a201workhrs=40 if a201wkhrslastweek=="all week";

qui gen registered_children=a205registedboy+a206registedgirl;
qui gen present_children=a207presentboy+a208presentgirl;
qui gen a207presentboy_pct=a207presentboy/a205registedboy;
qui gen a208presentgirl_pct=a208presentgirl/a206registedgirl;
qui gen present_children_pct=present_children/registered_children;
lab var registered_children "number of registered children";
lab var present_children "number of children present on day of visit";
lab var a207presentboy_pct "% of registered boys present on day of visit";
lab var a208presentgirl_pct "% of registered girls present on day of visit";
lab var present_children_pct "% of registered children present on day of visit";

* Section 3: Syllabus delivery;
qui gen q303startsyllabus_month=month(q303startsyllabus);
qui gen q303startsyllabus_year=year(q303startsyllabus);
qui replace q303startsyllabus_month=. if q303startsyllabus_year==1900;
qui replace q303startsyllabus_year=. if q303startsyllabus_year==1900;

qui gen q304nametheme=(q304athemethisweek!=.|q304bthemethisweek!=.|q304cthemethisweek!=.);
lab var q304nametheme "named theme from syllabus as this week's topic";

qui gen q305followsyllabus_full=(q305followsyllabus==1);
lab var q305followsyllabus_full "follows syllabus fully (partial/missing set to 0)";

recode q313adequatematerial (2=0);

* Section 4: Children's involvement;
foreach x in a b c d e {;
	qui gen q401`x'_verymuch=(q401`x'==3);
	qui replace q401`x'_verymuch=. if q401`x'==.;
};
la var q401a_verymuch "To what extent are children asking questions? response=very much";
la var q401b_verymuch "To what extent are children taking initiatives? response=very much";
la var q401c_verymuch "To what extent are children interacting among themselves? response=very much";
la var q401d_verymuch "To what extent are children listening and responding to facilitator? response=very much";
la var q401e_verymuch "To what extent are children interacting with toys and books? response=very much";

* MERGE WITH BASELINE/ENDLINE DATA;
* get just one observation from each site, to assign treatment status in monitoring data;
/*set directory:
	cd mydir
*/
qui gen settlement_code=endline_settlecode;
qui save `cleandata'\ecdmonitoringtemp, replace;
qui use `cleandata'\ECD_3to6_Gambia_cleanv1, clear;
foreach x in ip9 treatment {;
	bysort settlement_code: egen `x'_mean=mean(`x'); /*non-integer values will reveal discrepancies in treatment status*/
	lab val `x'_mean `x';
};
qui egen tagged=tag(settlement_code);
qui keep if tagged==1;
qui save `cleandata'\ecdtemp, replace;
qui use `cleandata'\ecdmonitoringtemp, clear;

* select variables to keep in baseline/endline;
local X "treatment treatment_mean ip9 ip9 ip9_mean";
merge m:1 settlement_code using `cleandata'\ecdtemp, keepusing(`X');
qui drop if _merge==2;

* check treatment variables;
/*discrepancies in treatment status from baseline/endline (if non-integer values)*/
foreach x in ip9 treatment {;
	tab `x'_mean, mi;
};
tab ip9 treatment, mi;

/*discrepancies in baseline/endline treatment status and monitoring data*/
tab ip9 q105interventiontype, mi;
tab treatment q105interventiontype, mi;

* save data;
qui compress;
lab data "Gambia ECD Monitoring data, age 3-6 Annex and community-based, cleaned";
qui save `cleandata'\ECD_3to6_Gambia_monitoring_cleanv1, replace;

erase  `cleandata'\ecdmonitoringtemp.dta;
erase  `cleandata'\ecdtemp.dta;
local end=`"$S_TIME"'; 
di "`start'";
di "`end'";
